property <- read.csv("fy2023-property-assessment-data.csv")
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
property <- clean_names(property)
#creating a function to fiter outliers
filter_outliers <- function(x) {
quantile1 <- quantile(x, 0.25, na.rm = TRUE)
quantile3 <- quantile(x, 0.75, na.rm = TRUE)
iqr_value <- IQR(x, na.rm = TRUE)
lower_values <- quantile1 - 1.5 * iqr_value
upper_values <- quantile3 + 1.5 * iqr_value
outliers_removed <- ifelse(x < lower_values | x > upper_values, NA, x)
x_cleaned <- ifelse(is.na(outliers_removed), median(x, na.rm = TRUE), outliers_removed)
return(x_cleaned)
}
# Clean all numeric columns in the dataset
property_df_cleaned <- property %>%
mutate(across(where(is.numeric), filter_outliers))
# Remove rows with any NA values after cleaning
property_df_cleaned <-na.omit(property_df_cleaned)
property_extract<- subset(property_df_cleaned, select=
c(city, bldg_type, num_bldgs ,res_units, bldg_value, land_sf,land_value, total_value, gross_tax, yr_built, yr_remodel, structure_class, roof_structure, bed_rms, full_bth, lu, ext_fnished, heat_type, prop_view, ac_type ))
str(property_extract)
## 'data.frame': 180627 obs. of 20 variables:
## $ city : chr "EAST BOSTON" "EAST BOSTON" "EAST BOSTON" "EAST BOSTON" ...
## $ bldg_type : chr "RE - Row End" "RM - Row Middle" "RM - Row Middle" "RM - Row Middle" ...
## $ num_bldgs : int 1 1 1 1 1 1 1 1 1 1 ...
## $ res_units : num 3 3 3 3 3 3 3 3 3 3 ...
## $ bldg_value : int 588500 615300 599200 530200 569100 1027100 993200 876500 613100 802800 ...
## $ land_sf : num 1150 1150 1150 1150 2010 2500 2500 2500 2500 2500 ...
## $ land_value : int 195700 196500 197100 197700 225700 261200 262000 262700 263200 224400 ...
## $ total_value : int 784200 811800 796300 727900 794800 1288300 1255200 1139200 876300 1027200 ...
## $ gross_tax : num 8422 8719 8552 7818 8536 ...
## $ yr_built : int 1900 1920 1905 1900 1900 1900 1900 1900 1900 1900 ...
## $ yr_remodel : num 2004 2000 1985 1991 1978 ...
## $ structure_class: chr "" "" "" "" ...
## $ roof_structure : chr "F - Flat" "F - Flat" "F - Flat" "M - Mansard" ...
## $ bed_rms : num 6 3 5 5 6 3 3 3 5 6 ...
## $ full_bth : int 3 3 3 3 3 1 1 3 3 3 ...
## $ lu : chr "R3" "R3" "R3" "R3" ...
## $ ext_fnished : chr "A - Asbestos" "M - Vinyl" "M - Vinyl" "M - Vinyl" ...
## $ heat_type : chr "W - Ht Water/Steam" "F - Forced Hot Air" "S - Space Heat" "W - Ht Water/Steam" ...
## $ prop_view : chr "A - Average" "A - Average" "A - Average" "A - Average" ...
## $ ac_type : chr "N - None" "C - Central AC" "N - None" "N - None" ...
# Used summary function to group by City
city_grouped <- property_extract %>%
group_by(city) %>%
summarise(across(where(is.numeric),
list(
Mean = ~ mean(., na.rm = TRUE),
SD = ~ sd(., na.rm = TRUE),
Min = ~ min(., na.rm = TRUE),
Max = ~ max(., na.rm = TRUE),
Total = ~ sum(!is.na(.))
)))
# Replaced blank values with 'UNKNOWN'
city_grouped$city <- ifelse(city_grouped$city == "","UNKNOWN", city_grouped$city)
# Used kable function to create table
kable(city_grouped,
caption= "Descriptive Statistics for table grouped by CITY",
format = "html") %>%
kable_styling(full_width = FALSE)
| city | num_bldgs_Mean | num_bldgs_SD | num_bldgs_Min | num_bldgs_Max | num_bldgs_Total | res_units_Mean | res_units_SD | res_units_Min | res_units_Max | res_units_Total | bldg_value_Mean | bldg_value_SD | bldg_value_Min | bldg_value_Max | bldg_value_Total | land_sf_Mean | land_sf_SD | land_sf_Min | land_sf_Max | land_sf_Total | land_value_Mean | land_value_SD | land_value_Min | land_value_Max | land_value_Total | total_value_Mean | total_value_SD | total_value_Min | total_value_Max | total_value_Total | gross_tax_Mean | gross_tax_SD | gross_tax_Min | gross_tax_Max | gross_tax_Total | yr_built_Mean | yr_built_SD | yr_built_Min | yr_built_Max | yr_built_Total | yr_remodel_Mean | yr_remodel_SD | yr_remodel_Min | yr_remodel_Max | yr_remodel_Total | bed_rms_Mean | bed_rms_SD | bed_rms_Min | bed_rms_Max | bed_rms_Total | full_bth_Mean | full_bth_SD | full_bth_Min | full_bth_Max | full_bth_Total |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| UNKNOWN | 1 | 0 | 1 | 1 | 5 | 3.000000 | 0.0000000 | 3 | 3 | 5 | 332760.0 | 293463.8 | 7500 | 646900 | 5 | 4562.600 | 3430.438 | 2014 | 8320 | 5 | 0.00 | 0.00 | 0 | 0 | 5 | 501340.0 | 263423.7 | 31000 | 646900 | 5 | 7136.398 | 5603.741 | 332.940 | 15965.490 | 5 | 1965.200 | 43.66578 | 1920 | 2007 | 5 | 2006.200 | 4.919350 | 2004 | 2015 | 5 | 3.400000 | 0.8944272 | 3 | 5 | 5 | 0.400000 | 0.5477226 | 0 | 1 | 5 |
| ALLSTON | 1 | 0 | 1 | 1 | 4423 | 3.012661 | 0.2802447 | 2 | 9 | 4423 | 520738.6 | 279717.9 | 0 | 1413700 | 4423 | 2509.162 | 2197.625 | 100 | 10388 | 4423 | 104565.70 | 157528.91 | 0 | 561236 | 4423 | 635154.2 | 376256.4 | 0 | 1714500 | 4423 | 6680.372 | 4008.301 | 0.000 | 18453.467 | 4423 | 1931.358 | 35.22143 | 1812 | 2021 | 4423 | 1998.340 | 12.289278 | 1945 | 2021 | 4423 | 2.773231 | 1.4335540 | 0 | 7 | 4423 | 1.172055 | 0.8496236 | 0 | 3 | 4423 |
| BOSTON | 1 | 0 | 1 | 1 | 47104 | 3.060674 | 0.4811500 | 1 | 9 | 47104 | 543121.5 | 347347.8 | 0 | 1414100 | 47104 | 1491.213 | 1221.085 | 101 | 10420 | 47104 | 16799.13 | 79888.56 | 0 | 561600 | 47104 | 623043.3 | 395098.0 | 0 | 1715700 | 47104 | 6447.484 | 4374.329 | 0.000 | 18454.537 | 47104 | 1930.113 | 46.96075 | 1805 | 2022 | 47104 | 2000.898 | 12.582038 | 1945 | 2022 | 47104 | 2.134299 | 1.0470172 | 0 | 7 | 47104 | 1.017918 | 0.8000923 | 0 | 3 | 47104 |
| BRIGHTON | 1 | 0 | 1 | 1 | 11870 | 3.005392 | 0.2659766 | 1 | 9 | 11870 | 484036.3 | 255687.5 | 0 | 1413700 | 11870 | 2543.282 | 2338.322 | 113 | 10405 | 11870 | 107595.26 | 154279.18 | 0 | 562500 | 11870 | 594385.0 | 358777.3 | 0 | 1715200 | 11870 | 6280.716 | 3906.149 | 0.000 | 18452.390 | 11870 | 1936.164 | 32.97777 | 1826 | 2022 | 11870 | 1999.266 | 11.458126 | 1950 | 2022 | 11870 | 2.722915 | 1.5728890 | 0 | 7 | 11870 | 1.271609 | 0.7776868 | 0 | 3 | 11870 |
| BROOKLINE | 1 | 0 | 1 | 1 | 24 | 3.000000 | 0.0000000 | 3 | 3 | 24 | 691437.5 | 327954.3 | 0 | 1201800 | 24 | 4048.708 | 1706.775 | 881 | 7000 | 24 | 310387.50 | 201782.56 | 0 | 518100 | 24 | 983700.0 | 506916.4 | 0 | 1688100 | 24 | 9999.924 | 5828.865 | 0.000 | 18130.190 | 24 | 1919.583 | 12.51752 | 1900 | 1955 | 24 | 1998.375 | 13.044447 | 1979 | 2021 | 24 | 3.791667 | 1.2503623 | 2 | 7 | 24 | 1.250000 | 1.0320937 | 0 | 3 | 24 |
| CHARLESTOWN | 1 | 0 | 1 | 1 | 7230 | 2.996957 | 0.3609698 | 1 | 9 | 7230 | 555500.1 | 371028.2 | 0 | 1413900 | 7230 | 1433.702 | 1075.834 | 106 | 10386 | 7230 | 78861.19 | 145839.93 | 0 | 562500 | 7230 | 653957.4 | 431568.4 | 0 | 1714300 | 7230 | 6839.392 | 4737.971 | 0.000 | 18438.430 | 7230 | 1920.288 | 51.44167 | 1806 | 2022 | 7230 | 2004.248 | 9.194087 | 1950 | 2022 | 7230 | 2.521715 | 1.0546087 | 0 | 7 | 7230 | 1.280636 | 0.8441731 | 0 | 3 | 7230 |
| CHESTNUT HILL | 1 | 0 | 1 | 1 | 1017 | 3.000000 | 0.0000000 | 3 | 3 | 1017 | 419061.6 | 235049.3 | 0 | 1409900 | 1017 | 1834.948 | 1950.346 | 223 | 9944 | 1017 | 33939.33 | 91950.68 | 0 | 553400 | 1017 | 520106.3 | 364876.0 | 0 | 1657900 | 1017 | 5546.289 | 3924.105 | 0.000 | 17805.847 | 1017 | 1964.448 | 23.66984 | 1860 | 2014 | 1017 | 2001.299 | 9.463415 | 1978 | 2021 | 1017 | 1.965585 | 0.8322933 | 0 | 7 | 1017 | 1.314651 | 0.5913681 | 0 | 3 | 1017 |
| DEDHAM | 1 | 0 | 1 | 1 | 6 | 3.000000 | 0.0000000 | 3 | 3 | 6 | 316366.7 | 304932.0 | 0 | 822800 | 6 | 4680.667 | 2785.488 | 1330 | 8228 | 6 | 166383.33 | 120591.03 | 12700 | 281400 | 6 | 482750.0 | 404693.5 | 12700 | 1058400 | 6 | 5184.733 | 4346.408 | 136.400 | 11367.217 | 6 | 1944.167 | 31.37143 | 1920 | 2005 | 6 | 2005.167 | 2.857738 | 2004 | 2011 | 6 | 3.000000 | 0.6324555 | 2 | 4 | 6 | 1.000000 | 0.8944272 | 0 | 2 | 6 |
| DORCHESTER | 1 | 0 | 1 | 1 | 29212 | 3.009482 | 0.2831750 | 1 | 9 | 29212 | 425904.6 | 261471.4 | 0 | 1411800 | 29212 | 3517.647 | 2049.885 | 100 | 10423 | 29212 | 138801.47 | 115702.28 | 0 | 562400 | 29212 | 570512.4 | 326680.5 | 0 | 1713200 | 29212 | 6007.576 | 3652.608 | 0.000 | 18451.320 | 29212 | 1924.335 | 35.51287 | 1805 | 2022 | 29212 | 2003.973 | 8.644579 | 1945 | 2022 | 29212 | 3.605573 | 1.4174465 | 0 | 7 | 29212 | 1.499144 | 1.0505843 | 0 | 3 | 29212 |
| EAST BOSTON | 1 | 0 | 1 | 1 | 9951 | 3.030047 | 0.4148034 | 1 | 9 | 9951 | 462957.1 | 272373.6 | 0 | 1413300 | 9951 | 2142.705 | 1625.491 | 100 | 10358 | 9951 | 114900.59 | 118144.26 | 0 | 558500 | 9951 | 587681.2 | 334280.7 | 0 | 1714600 | 9951 | 6263.397 | 3750.557 | 0.000 | 18411.890 | 9951 | 1931.983 | 45.61467 | 1830 | 2022 | 9951 | 2004.525 | 8.762263 | 1950 | 2022 | 9951 | 3.150437 | 1.4207294 | 0 | 7 | 9951 | 1.465581 | 1.0411610 | 0 | 3 | 9951 |
| HYDE PARK | 1 | 0 | 1 | 1 | 9207 | 2.995981 | 0.1689739 | 2 | 9 | 9207 | 314932.1 | 175887.4 | 0 | 1412700 | 9207 | 4881.729 | 2392.979 | 100 | 10422 | 9207 | 156064.95 | 92243.90 | 0 | 558000 | 9207 | 477775.1 | 228762.2 | 0 | 1697900 | 9207 | 5056.070 | 2586.822 | 0.000 | 18285.410 | 9207 | 1937.738 | 32.50075 | 1810 | 2021 | 9207 | 2003.088 | 8.690376 | 1945 | 2021 | 9207 | 3.377104 | 1.1218745 | 0 | 7 | 9207 | 1.289454 | 0.8240628 | 0 | 3 | 9207 |
| JAMAICA PLAIN | 1 | 0 | 1 | 1 | 12104 | 2.994052 | 0.3702981 | 2 | 9 | 12104 | 520806.5 | 329397.4 | 0 | 1413700 | 12104 | 2882.166 | 2176.017 | 100 | 10414 | 12104 | 111333.38 | 162693.52 | 0 | 562500 | 12104 | 637322.9 | 420380.3 | 0 | 1715000 | 12104 | 6753.021 | 4621.264 | 0.000 | 18450.770 | 12104 | 1929.568 | 39.30802 | 1814 | 2021 | 12104 | 2003.425 | 10.101722 | 1945 | 2022 | 12104 | 3.023629 | 1.2324365 | 0 | 7 | 12104 | 1.345010 | 0.8614332 | 0 | 3 | 12104 |
| MATTAPAN | 1 | 0 | 1 | 1 | 4847 | 3.006602 | 0.1989390 | 2 | 9 | 4847 | 333446.9 | 204239.3 | 0 | 1413000 | 4847 | 4528.211 | 1927.585 | 115 | 10416 | 4847 | 146725.55 | 85171.72 | 0 | 549200 | 4847 | 483808.0 | 257296.4 | 0 | 1704500 | 4847 | 5083.583 | 2854.012 | 0.000 | 18154.610 | 4847 | 1933.737 | 29.02555 | 1820 | 2021 | 4847 | 2002.420 | 10.018523 | 1945 | 2021 | 4847 | 3.695069 | 1.2451567 | 0 | 7 | 4847 | 1.386218 | 0.9692975 | 0 | 3 | 4847 |
| NEWTON | 1 | NA | 1 | 1 | 1 | 3.000000 | NA | 3 | 3 | 1 | 414400.0 | NA | 414400 | 414400 | 1 | 5090.000 | NA | 5090 | 5090 | 1 | 215100.00 | NA | 215100 | 215100 | 1 | 629500.0 | NA | 629500 | 629500 | 1 | 6760.827 | NA | 6760.827 | 6760.827 | 1 | 1900.000 | NA | 1900 | 1900 | 1 | 2004.000 | NA | 2004 | 2004 | 1 | 4.000000 | NA | 4 | 4 | 1 | 2.000000 | NA | 2 | 2 | 1 |
| READVILLE | 1 | 0 | 1 | 1 | 2 | 3.000000 | 0.0000000 | 3 | 3 | 2 | 568500.0 | 112147.1 | 489200 | 647800 | 2 | 5079.500 | 4335.272 | 2014 | 8145 | 2 | 91000.00 | 128693.43 | 0 | 182000 | 2 | 719700.0 | 155704.9 | 609600 | 829800 | 2 | 3230.594 | 4568.749 | 0.000 | 6461.187 | 2 | 1960.000 | 86.26703 | 1899 | 2021 | 2 | 2004.000 | 0.000000 | 2004 | 2004 | 2 | 3.000000 | 0.0000000 | 3 | 3 | 2 | 0.000000 | 0.0000000 | 0 | 0 | 2 |
| ROSLINDALE | 1 | 0 | 1 | 1 | 9210 | 2.975462 | 0.3038920 | 1 | 9 | 9210 | 402995.8 | 215551.9 | 0 | 1399800 | 9210 | 4113.130 | 2230.198 | 113 | 10402 | 9210 | 142493.38 | 112938.21 | 0 | 545300 | 9210 | 548832.4 | 280573.1 | 0 | 1703806 | 9210 | 5870.318 | 3114.817 | 0.000 | 18273.070 | 9210 | 1931.258 | 33.58330 | 1810 | 2022 | 9210 | 2004.250 | 9.162412 | 1945 | 2022 | 9210 | 3.367427 | 1.1824828 | 0 | 7 | 9210 | 1.382193 | 0.8128541 | 0 | 3 | 9210 |
| ROXBURY | 1 | 0 | 1 | 1 | 6168 | 3.010052 | 0.3127932 | 1 | 9 | 6168 | 352143.0 | 279729.9 | 0 | 1413961 | 6168 | 2917.603 | 1969.704 | 107 | 10421 | 6168 | 103321.71 | 109489.80 | 0 | 561800 | 6168 | 468716.3 | 337425.4 | 0 | 1713100 | 6168 | 4743.391 | 3741.367 | 0.000 | 18299.890 | 6168 | 1928.715 | 42.71809 | 1821 | 2022 | 6168 | 2003.587 | 7.879435 | 1947 | 2022 | 6168 | 3.341278 | 1.1846411 | 0 | 7 | 6168 | 1.126135 | 1.0664532 | 0 | 3 | 6168 |
| ROXBURY CROSSING | 1 | 0 | 1 | 1 | 1832 | 3.008734 | 0.2743945 | 2 | 9 | 1832 | 511600.5 | 383344.3 | 0 | 1414100 | 1832 | 2532.479 | 1772.362 | 185 | 10412 | 1832 | 156740.27 | 187520.89 | 0 | 561700 | 1832 | 628109.6 | 429977.7 | 0 | 1713743 | 1832 | 6234.123 | 4929.640 | 0.000 | 18434.140 | 1832 | 1932.779 | 41.65142 | 1835 | 2021 | 1832 | 2003.543 | 8.788075 | 1950 | 2021 | 1832 | 3.173581 | 1.1102032 | 1 | 7 | 1832 | 1.162664 | 1.0723490 | 0 | 3 | 1832 |
| SOUTH BOSTON | 1 | 0 | 1 | 1 | 15439 | 3.026297 | 0.4889850 | 1 | 9 | 15439 | 540599.4 | 366421.9 | 0 | 1410600 | 15439 | 1623.014 | 1203.471 | 100 | 10418 | 15439 | 72626.13 | 132886.70 | 0 | 562400 | 15439 | 618473.1 | 420574.7 | 0 | 1715800 | 15439 | 6586.627 | 4566.800 | 0.000 | 18452.397 | 15439 | 1933.085 | 48.84513 | 1810 | 2021 | 15439 | 2004.314 | 7.956842 | 1950 | 2022 | 15439 | 2.658657 | 1.1137855 | 0 | 7 | 15439 | 1.296328 | 0.8623076 | 0 | 3 | 15439 |
| WEST ROXBURY | 1 | 0 | 1 | 1 | 10975 | 2.992893 | 0.1711478 | 2 | 8 | 10975 | 391766.7 | 229428.4 | 0 | 1414100 | 10975 | 4591.690 | 2337.698 | 110 | 10416 | 10975 | 179690.70 | 119162.34 | 0 | 561500 | 10975 | 579196.8 | 310047.0 | 0 | 1713000 | 10975 | 6118.703 | 3420.295 | 0.000 | 18309.547 | 10975 | 1937.725 | 29.00552 | 1804 | 2021 | 10975 | 2004.670 | 7.891918 | 1945 | 2022 | 10975 | 3.220046 | 0.9876027 | 0 | 7 | 10975 | 1.224966 | 0.7930328 | 0 | 3 | 10975 |
# Created a separate df with below heads for each
heat_type_grouped <- property_extract %>%
group_by(heat_type) %>%
summarise(across(where(is.numeric),
list(
Mean = ~ mean(., na.rm = TRUE),
SD = ~ sd(., na.rm = TRUE),
Min = ~ min(., na.rm = TRUE),
Max = ~ max(., na.rm = TRUE),
Total = ~ sum(!is.na(.))
)))
# Replaced blank values with 'UNKNOWN'
heat_type_grouped$heat_type <- ifelse(heat_type_grouped$heat_type == "","UNKNOWN", heat_type_grouped$heat_type)
kable(heat_type_grouped,
caption= "Descriptive Statistics for table grouped by HEAT TYPE",
format = "html") %>%
kable_styling(full_width = FALSE)
| heat_type | num_bldgs_Mean | num_bldgs_SD | num_bldgs_Min | num_bldgs_Max | num_bldgs_Total | res_units_Mean | res_units_SD | res_units_Min | res_units_Max | res_units_Total | bldg_value_Mean | bldg_value_SD | bldg_value_Min | bldg_value_Max | bldg_value_Total | land_sf_Mean | land_sf_SD | land_sf_Min | land_sf_Max | land_sf_Total | land_value_Mean | land_value_SD | land_value_Min | land_value_Max | land_value_Total | total_value_Mean | total_value_SD | total_value_Min | total_value_Max | total_value_Total | gross_tax_Mean | gross_tax_SD | gross_tax_Min | gross_tax_Max | gross_tax_Total | yr_built_Mean | yr_built_SD | yr_built_Min | yr_built_Max | yr_built_Total | yr_remodel_Mean | yr_remodel_SD | yr_remodel_Min | yr_remodel_Max | yr_remodel_Total | bed_rms_Mean | bed_rms_SD | bed_rms_Min | bed_rms_Max | bed_rms_Total | full_bth_Mean | full_bth_SD | full_bth_Min | full_bth_Max | full_bth_Total |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| UNKNOWN | 1 | 0 | 1 | 1 | 47845 | 3.077793 | 0.7121207 | 1 | 9 | 47845 | 208733.7 | 315449.8 | 0 | 1414100 | 47845 | 2938.097 | 2059.0450 | 100 | 10423 | 47845 | 62072.745 | 122235.18 | 0 | 562500 | 47845 | 305120.3 | 395241.1 | 0 | 1715400 | 47845 | 2796.512 | 4227.403 | 0.00 | 18452.39 | 47845 | 1930.399 | 34.35975 | 1809 | 2022 | 47845 | 2003.195 | 7.982582 | 1945 | 2022 | 47845 | 2.999352 | 0.0362817 | 0 | 4 | 47845 | 0.2312049 | 0.4312148 | 0 | 3 | 47845 |
| E - Electric | 1 | 0 | 1 | 1 | 5903 | 3.000678 | 0.0552210 | 2 | 6 | 5903 | 533664.7 | 245569.6 | 0 | 1410200 | 5903 | 1387.873 | 1462.2430 | 200 | 10290 | 5903 | 38228.991 | 89599.94 | 0 | 554800 | 5903 | 588274.9 | 284122.7 | 0 | 1715200 | 5903 | 6288.321 | 3087.277 | 0.00 | 18442.73 | 5903 | 1928.151 | 43.37957 | 1820 | 2021 | 5903 | 1997.272 | 13.164108 | 1950 | 2022 | 5903 | 2.063866 | 1.2108262 | 0 | 7 | 5903 | 1.3023886 | 0.5666724 | 1 | 3 | 5903 |
| F - Forced Hot Air | 1 | 0 | 1 | 1 | 49801 | 2.998996 | 0.0502903 | 1 | 6 | 49801 | 609425.9 | 253320.9 | 0 | 1413700 | 49801 | 2242.838 | 1938.0174 | 130 | 10422 | 49801 | 74704.410 | 117741.70 | 0 | 560900 | 49801 | 710966.0 | 295216.7 | 0 | 1715500 | 49801 | 7614.558 | 3194.420 | 0.00 | 18453.47 | 49801 | 1946.643 | 52.32474 | 1805 | 2022 | 49801 | 2005.902 | 8.118993 | 1945 | 2022 | 49801 | 2.676091 | 1.3914254 | 0 | 7 | 49801 | 1.6844441 | 0.6932265 | 0 | 3 | 49801 |
| N - None | 1 | 0 | 1 | 1 | 89 | 3.000000 | 0.0000000 | 3 | 3 | 89 | 453149.9 | 206383.0 | 97100 | 1342000 | 89 | 4565.787 | 2110.0804 | 774 | 9600 | 89 | 202719.787 | 86422.87 | 0 | 418800 | 89 | 651698.9 | 228759.3 | 171700 | 1272500 | 89 | 6995.384 | 2457.658 | 1844.06 | 13666.65 | 89 | 1920.955 | 30.73712 | 1866 | 2016 | 89 | 2002.865 | 7.616058 | 1980 | 2021 | 89 | 3.955056 | 1.4764062 | 1 | 7 | 89 | 1.9662921 | 0.8181109 | 1 | 3 | 89 |
| O - Other | 1 | 0 | 1 | 1 | 41 | 3.000000 | 0.0000000 | 3 | 3 | 41 | 558768.3 | 232427.7 | 10300 | 1299900 | 41 | 3182.488 | 2329.2779 | 460 | 8888 | 41 | 123126.829 | 124580.42 | 0 | 385200 | 41 | 686251.2 | 240723.1 | 247100 | 1685100 | 41 | 7355.669 | 2590.357 | 2653.85 | 18097.98 | 41 | 1949.512 | 50.70953 | 1865 | 2020 | 41 | 2004.878 | 9.233621 | 1979 | 2021 | 41 | 2.853658 | 1.4415439 | 1 | 7 | 41 | 1.6341463 | 0.6616683 | 1 | 3 | 41 |
| P - Heat Pump | 1 | 0 | 1 | 1 | 4619 | 3.000000 | 0.0000000 | 3 | 3 | 4619 | 686830.3 | 278793.7 | 144900 | 1413900 | 4619 | 1284.755 | 833.0167 | 280 | 9473 | 4619 | 5796.932 | 36741.34 | 0 | 486300 | 4619 | 780936.9 | 335075.7 | 155200 | 1715000 | 4619 | 8359.741 | 3636.198 | 0.00 | 18441.66 | 4619 | 1950.080 | 51.18480 | 1840 | 2021 | 4619 | 2000.201 | 11.828843 | 1975 | 2022 | 4619 | 1.771379 | 0.8433594 | 0 | 7 | 4619 | 1.4602728 | 0.5769989 | 1 | 3 | 4619 |
| S - Space Heat | 1 | 0 | 1 | 1 | 677 | 2.998523 | 0.0666009 | 2 | 4 | 677 | 604080.3 | 250784.0 | 0 | 1357600 | 677 | 1986.380 | 1258.5560 | 280 | 9000 | 677 | 208835.430 | 110051.45 | 0 | 560800 | 677 | 808742.4 | 319176.5 | 0 | 1715000 | 677 | 8647.871 | 3450.326 | 0.00 | 18419.10 | 677 | 1900.363 | 16.71625 | 1835 | 2020 | 677 | 2002.171 | 9.294661 | 1950 | 2022 | 677 | 4.011817 | 1.6371724 | 0 | 7 | 677 | 2.3604136 | 0.8444555 | 0 | 3 | 677 |
| W - Ht Water/Steam | 1 | 0 | 1 | 1 | 71652 | 2.999442 | 0.0659858 | 1 | 9 | 71652 | 534516.6 | 227739.7 | 0 | 1414100 | 71652 | 3035.156 | 2350.4034 | 150 | 10422 | 71652 | 140417.826 | 133263.99 | 0 | 562500 | 71652 | 680449.6 | 286073.8 | 0 | 1715800 | 71652 | 7285.557 | 3093.880 | 0.00 | 18454.54 | 71652 | 1919.102 | 30.46559 | 1804 | 2021 | 71652 | 2000.741 | 11.954586 | 1945 | 2022 | 71652 | 3.088023 | 1.6611277 | 0 | 7 | 71652 | 1.6210573 | 0.7563859 | 0 | 3 | 71652 |
# Created a separate df with below heads for each
prop_view_grouped <- property_extract %>%
group_by(prop_view) %>%
summarise(across(where(is.numeric),
list(
Mean = ~ mean(., na.rm = TRUE),
SD = ~ sd(., na.rm = TRUE),
Min = ~ min(., na.rm = TRUE),
Max = ~ max(., na.rm = TRUE),
Total = ~ sum(!is.na(.))
)))
# Replaced blank values with 'UNKNOWN'
prop_view_grouped$prop_view <- ifelse(prop_view_grouped$prop_view == "","UNKNOWN", prop_view_grouped$prop_view )
kable(prop_view_grouped,
caption= "Descriptive Statistics for table grouped by PROPERTY VIEW",
format = "html") %>%
kable_styling(full_width = FALSE)
| prop_view | num_bldgs_Mean | num_bldgs_SD | num_bldgs_Min | num_bldgs_Max | num_bldgs_Total | res_units_Mean | res_units_SD | res_units_Min | res_units_Max | res_units_Total | bldg_value_Mean | bldg_value_SD | bldg_value_Min | bldg_value_Max | bldg_value_Total | land_sf_Mean | land_sf_SD | land_sf_Min | land_sf_Max | land_sf_Total | land_value_Mean | land_value_SD | land_value_Min | land_value_Max | land_value_Total | total_value_Mean | total_value_SD | total_value_Min | total_value_Max | total_value_Total | gross_tax_Mean | gross_tax_SD | gross_tax_Min | gross_tax_Max | gross_tax_Total | yr_built_Mean | yr_built_SD | yr_built_Min | yr_built_Max | yr_built_Total | yr_remodel_Mean | yr_remodel_SD | yr_remodel_Min | yr_remodel_Max | yr_remodel_Total | bed_rms_Mean | bed_rms_SD | bed_rms_Min | bed_rms_Max | bed_rms_Total | full_bth_Mean | full_bth_SD | full_bth_Min | full_bth_Max | full_bth_Total |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| UNKNOWN | 1 | 0 | 1 | 1 | 46762 | 3.083465 | 0.6995163 | 1 | 9 | 46762 | 213292.0 | 317372.6 | 0 | 1414100 | 46762 | 2921.539 | 2054.522 | 100 | 10423 | 46762 | 63454.38 | 123235.32 | 0 | 562500 | 46762 | 311818.0 | 396888.8 | 0 | 1715400 | 46762 | 2857.406 | 4252.462 | 0.000 | 18452.39 | 46762 | 1930.479 | 33.93058 | 1810 | 2022 | 46762 | 2003.173 | 8.065095 | 1945 | 2022 | 46762 | 2.999359 | 0.0403097 | 0 | 6 | 46762 | 0.2137205 | 0.4199347 | 0 | 2 | 46762 |
| A - Average | 1 | 0 | 1 | 1 | 110120 | 2.997675 | 0.1144094 | 1 | 9 | 110120 | 548101.0 | 239026.0 | 0 | 1414100 | 110120 | 2786.564 | 2259.478 | 130 | 10422 | 110120 | 117133.71 | 130564.01 | 0 | 562500 | 110120 | 672112.8 | 289630.8 | 0 | 1715800 | 110120 | 7196.673 | 3129.552 | 0.000 | 18454.54 | 110120 | 1928.304 | 41.18939 | 1804 | 2022 | 110120 | 2002.687 | 10.821214 | 1945 | 2022 | 110120 | 2.979831 | 1.5628980 | 0 | 7 | 110120 | 1.6295768 | 0.7360373 | 0 | 3 | 110120 |
| E - Excellent | 1 | 0 | 1 | 1 | 4383 | 2.997718 | 0.0522807 | 1 | 3 | 4383 | 681014.6 | 282790.8 | 0 | 1413900 | 4383 | 1707.764 | 1220.896 | 245 | 9960 | 4383 | 20888.25 | 82780.68 | 0 | 562400 | 4383 | 838833.1 | 343989.6 | 0 | 1715500 | 4383 | 8942.843 | 3800.562 | 0.000 | 18439.51 | 4383 | 1967.383 | 48.49512 | 1816 | 2020 | 4383 | 2003.522 | 10.207317 | 1950 | 2022 | 4383 | 2.066393 | 1.1432815 | 0 | 7 | 4383 | 1.7246178 | 0.6675318 | 1 | 3 | 4383 |
| F - Fair | 1 | 0 | 1 | 1 | 6113 | 2.998364 | 0.0922235 | 2 | 6 | 6113 | 528987.9 | 250489.5 | 0 | 1411300 | 6113 | 2092.653 | 1920.749 | 170 | 10402 | 6113 | 119164.92 | 133328.72 | 0 | 559400 | 6113 | 652085.9 | 303785.6 | 0 | 1715800 | 6113 | 7000.959 | 3274.319 | 0.000 | 18438.43 | 6113 | 1914.331 | 36.54574 | 1806 | 2021 | 6113 | 1999.968 | 12.735458 | 1950 | 2022 | 6113 | 2.712743 | 1.7095923 | 0 | 7 | 6113 | 1.5951251 | 0.7615746 | 0 | 3 | 6113 |
| G - Good | 1 | 0 | 1 | 1 | 12405 | 3.000726 | 0.1531654 | 1 | 9 | 12405 | 677576.9 | 273359.9 | 0 | 1413000 | 12405 | 1633.498 | 1549.584 | 175 | 10407 | 12405 | 32344.30 | 93186.52 | 0 | 561000 | 12405 | 775131.8 | 322636.8 | 0 | 1715500 | 12405 | 8303.874 | 3498.403 | 0.000 | 18448.10 | 12405 | 1947.536 | 52.46475 | 1809 | 2022 | 12405 | 2002.061 | 11.777985 | 1950 | 2022 | 12405 | 2.030149 | 1.1493140 | 0 | 7 | 12405 | 1.5378476 | 0.6228142 | 0 | 3 | 12405 |
| P - Poor | 1 | 0 | 1 | 1 | 534 | 2.998127 | 0.1145849 | 2 | 5 | 534 | 588746.9 | 275432.5 | 0 | 1406400 | 534 | 1813.234 | 1654.182 | 175 | 9078 | 534 | 151871.30 | 146943.01 | 0 | 560500 | 534 | 737490.1 | 342281.8 | 0 | 1683300 | 534 | 7884.944 | 3706.175 | 0.000 | 18078.64 | 534 | 1904.974 | 34.10462 | 1820 | 2018 | 534 | 1999.320 | 14.945695 | 1950 | 2022 | 534 | 2.962547 | 1.7300191 | 0 | 7 | 534 | 1.7134831 | 0.8139991 | 1 | 3 | 534 |
| S - Special | 1 | 0 | 1 | 1 | 310 | 3.000000 | 0.0000000 | 3 | 3 | 310 | 521921.0 | 146638.2 | 489200 | 1231100 | 310 | 2482.684 | 1054.538 | 839 | 7557 | 310 | 0.00 | 0.00 | 0 | 0 | 310 | 670428.4 | 210576.6 | 609600 | 1697200 | 310 | 7160.354 | 2372.915 | 6461.187 | 18447.02 | 310 | 1997.629 | 31.89994 | 1900 | 2020 | 310 | 2006.110 | 5.571039 | 1990 | 2022 | 310 | 2.541936 | 0.7652544 | 1 | 6 | 310 | 2.0290323 | 0.6984465 | 1 | 3 | 310 |
# Created a sample of 10,000 values randomly
sample1 <- sample_n(property_extract,10000)
# Created a separate df with below heads for each grouped by prop view
sample1_grouped <- sample1 %>%
group_by(prop_view) %>%
summarise(across(where(is.numeric),
list(
Mean = ~ mean(., na.rm = TRUE),
SD = ~ sd(., na.rm = TRUE),
Min = ~ min(., na.rm = TRUE),
Max = ~ max(., na.rm = TRUE),
Total = ~ sum(!is.na(.))
)))
# Replaced blank values with 'UNKNOWN'
sample1_grouped$prop_view <- ifelse(sample1_grouped$prop_view == "","UNKNOWN", sample1_grouped$prop_view )
kable(sample1_grouped,
caption= "Descriptive Statistics for SAMPLE 1 grouped by PROPERTY VIEW",
format = "html") %>%
kable_styling(full_width = FALSE)
| prop_view | num_bldgs_Mean | num_bldgs_SD | num_bldgs_Min | num_bldgs_Max | num_bldgs_Total | res_units_Mean | res_units_SD | res_units_Min | res_units_Max | res_units_Total | bldg_value_Mean | bldg_value_SD | bldg_value_Min | bldg_value_Max | bldg_value_Total | land_sf_Mean | land_sf_SD | land_sf_Min | land_sf_Max | land_sf_Total | land_value_Mean | land_value_SD | land_value_Min | land_value_Max | land_value_Total | total_value_Mean | total_value_SD | total_value_Min | total_value_Max | total_value_Total | gross_tax_Mean | gross_tax_SD | gross_tax_Min | gross_tax_Max | gross_tax_Total | yr_built_Mean | yr_built_SD | yr_built_Min | yr_built_Max | yr_built_Total | yr_remodel_Mean | yr_remodel_SD | yr_remodel_Min | yr_remodel_Max | yr_remodel_Total | bed_rms_Mean | bed_rms_SD | bed_rms_Min | bed_rms_Max | bed_rms_Total | full_bth_Mean | full_bth_SD | full_bth_Min | full_bth_Max | full_bth_Total |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| UNKNOWN | 1 | 0 | 1 | 1 | 2639 | 3.095112 | 0.7167251 | 2 | 9 | 2639 | 211855.6 | 309928.1 | 0 | 1410100 | 2639 | 2949.981 | 2053.2866 | 106 | 10388 | 2639 | 60444.51 | 120733.76 | 0 | 561500 | 2639 | 307873.4 | 388765.0 | 0 | 1713000 | 2639 | 2839.393 | 4243.914 | 0.000 | 18287.00 | 2639 | 1931.022 | 34.50532 | 1850 | 2022 | 2639 | 2003.361 | 7.876098 | 1950 | 2022 | 2639 | 3.000000 | 0.0000000 | 3 | 3 | 2639 | 0.2068966 | 0.4134923 | 0 | 2 | 2639 |
| A - Average | 1 | 0 | 1 | 1 | 6038 | 2.997847 | 0.1280400 | 2 | 8 | 6038 | 542108.1 | 234181.1 | 0 | 1411900 | 6038 | 2767.004 | 2243.9249 | 175 | 10416 | 6038 | 117157.88 | 130669.77 | 0 | 557100 | 6038 | 669010.2 | 291258.6 | 0 | 1713600 | 6038 | 7163.742 | 3151.849 | 0.000 | 18454.54 | 6038 | 1927.243 | 41.04945 | 1808 | 2021 | 6038 | 2002.518 | 10.827786 | 1945 | 2022 | 6038 | 2.998675 | 1.5724979 | 0 | 7 | 6038 | 1.6425969 | 0.7418849 | 0 | 3 | 6038 |
| E - Excellent | 1 | 0 | 1 | 1 | 233 | 3.000000 | 0.0000000 | 3 | 3 | 233 | 670653.2 | 286681.6 | 0 | 1407500 | 233 | 1623.532 | 1094.1985 | 372 | 8835 | 233 | 26077.25 | 96012.74 | 0 | 559300 | 233 | 867436.5 | 374418.7 | 0 | 1712000 | 233 | 9257.605 | 4091.659 | 0.000 | 18386.88 | 233 | 1967.137 | 54.15677 | 1816 | 2020 | 233 | 2002.906 | 9.706007 | 1960 | 2021 | 233 | 2.064378 | 1.1142384 | 0 | 6 | 233 | 1.7553648 | 0.6729240 | 1 | 3 | 233 |
| F - Fair | 1 | 0 | 1 | 1 | 347 | 2.994236 | 0.0758092 | 2 | 3 | 347 | 559253.9 | 281197.8 | 0 | 1409400 | 347 | 2093.118 | 1885.5264 | 207 | 9602 | 347 | 121443.59 | 133125.83 | 0 | 530800 | 347 | 679035.2 | 320242.8 | 0 | 1619700 | 347 | 7273.361 | 3432.970 | 0.000 | 17395.58 | 347 | 1914.236 | 38.97689 | 1825 | 2019 | 347 | 2000.726 | 12.793264 | 1950 | 2021 | 347 | 2.688761 | 1.6746893 | 0 | 7 | 347 | 1.5850144 | 0.7645356 | 1 | 3 | 347 |
| G - Good | 1 | 0 | 1 | 1 | 690 | 3.002899 | 0.2409289 | 2 | 9 | 690 | 674122.2 | 279496.6 | 0 | 1406500 | 690 | 1634.370 | 1572.6911 | 312 | 10365 | 690 | 28144.20 | 88922.42 | 0 | 510500 | 690 | 775186.2 | 334498.9 | 0 | 1702400 | 690 | 8308.192 | 3607.061 | 0.000 | 18283.78 | 690 | 1953.788 | 52.52591 | 1832 | 2020 | 690 | 2002.130 | 11.694854 | 1950 | 2022 | 690 | 2.008696 | 1.1630173 | 0 | 7 | 690 | 1.5449275 | 0.6249612 | 1 | 3 | 690 |
| P - Poor | 1 | 0 | 1 | 1 | 38 | 3.000000 | 0.0000000 | 3 | 3 | 38 | 642939.5 | 311308.2 | 157800 | 1379700 | 38 | 2235.579 | 1903.9094 | 290 | 7133 | 38 | 165447.37 | 135939.21 | 0 | 473600 | 38 | 788121.1 | 335012.7 | 214800 | 1487200 | 38 | 8462.159 | 3599.301 | 2306.950 | 15972.53 | 38 | 1909.658 | 36.35952 | 1840 | 2009 | 38 | 1999.816 | 16.261138 | 1950 | 2021 | 38 | 2.789474 | 1.5795637 | 1 | 6 | 38 | 1.7105263 | 0.7678645 | 1 | 3 | 38 |
| S - Special | 1 | 0 | 1 | 1 | 15 | 3.000000 | 0.0000000 | 3 | 3 | 15 | 538493.3 | 190912.3 | 489200 | 1228600 | 15 | 1949.200 | 706.3606 | 1219 | 3696 | 15 | 0.00 | 0.00 | 0 | 0 | 15 | 650866.7 | 159825.1 | 609600 | 1228600 | 15 | 6910.119 | 1738.704 | 6461.187 | 13195.16 | 15 | 1996.133 | 29.61628 | 1900 | 2020 | 15 | 2007.933 | 5.945787 | 2004 | 2019 | 15 | 2.133333 | 0.3518658 | 2 | 3 | 15 | 2.0000000 | 0.3779645 | 1 | 3 | 15 |
# Summarizes statistics for Sample 1, including the mean values of building value, land value, gross tax, and bed rooms.
Sample1_summary<- data.frame(
sample1%>%
summarise(
Data_Frame = "Sample 1",
Mean_Building_Value = mean(bldg_value),
Mean_Land_Value = mean(land_value),
Mean_Gross_Tax = mean(gross_tax),
Mean_Bed_Rms = mean(bed_rms)
))
# Creating sample 2 with same cols as above
sample2 <- sample_n(property_extract,10000)
Sample2_summary<- data.frame(
sample2%>%
summarise(
Data_Frame = "Sample 2",
Mean_Building_Value = mean(bldg_value),
Mean_Land_Value = mean(land_value),
Mean_Gross_Tax = mean(gross_tax),
Mean_Bed_Rms = mean(bed_rms)
))
# Creating sample 3 with same cols as above
sample3 <- sample_n(property_extract,10000)
Sample3_summary<- data.frame(
sample3%>%
summarise(
Data_Frame = "Sample 3",
Mean_Building_Value = mean(bldg_value),
Mean_Land_Value = mean(land_value),
Mean_Gross_Tax = mean(gross_tax),
Mean_Bed_Rms = mean(bed_rms)
))
Main_df <- data.frame(
property_df_cleaned%>%
summarise(
Data_Frame = "Main df",
Mean_Building_Value = mean(bldg_value),
Mean_Land_Value = mean(land_value),
Mean_Gross_Tax = mean(gross_tax),
Mean_Bed_Rms = mean(bed_rms)
))
# Use binding function to make the comparison
combined_data <- bind_rows(Main_df, Sample1_summary, Sample2_summary, Sample3_summary)
# Print the combined data frame with a caption using kable
kable(combined_data, caption = "Table 1: Mean of Main dataframe combined with 3 Samples",
format = "html") %>%
kable_styling(full_width = FALSE)
| Data_Frame | Mean_Building_Value | Mean_Land_Value | Mean_Gross_Tax | Mean_Bed_Rms |
|---|---|---|---|---|
| Main df | 472968.9 | 95048.67 | 6187.053 | 2.887658 |
| Sample 1 | 468031.2 | 94083.58 | 6158.658 | 2.896100 |
| Sample 2 | 478639.1 | 94017.67 | 6270.704 | 2.867300 |
| Sample 3 | 478344.7 | 94991.95 | 6229.032 | 2.877500 |
The descriptive statistics presented in Table 1 provide insights into the central tendency and variability of Building Value, Land Value, Gross Tax, and Bed Rooms for the main data frame (Main df) and three samples (Sample 1, Sample 2, Sample 3). The “Mean” column indicates the average values for each variable in the respective datasets. Comparing the means across the main dataset and samples allows us to understand that the mean of sampled data aligns with the mean of main dataset.
There is not much difference on comparing which suggests that the sampled data is relatively representative of the main dataset for the considered variables (Building Value, Land Value, Gross Tax, and Bed Rooms). This suggests that the random sampling process for creating Sample 1, Sample 2, and Sample 3 has captured the average values of the variables in a way that is consistent with the overall dataset.
A jitter chart is used to visualize the distribution of a dataset when there is overlap in values. It is particularly useful in scenarios where there are discrete categories on one axis and continuous data on the other. Jitter chart avoids over plotting and overlapping.
A boxplot is a graphical representation that displays the distribution of a dataset and provides a visual summary of its key statistical measures. Box plot is used for detecting outliers. Boxplots allow for a quick visual inspection of the spread of the data.
# Creating a jitter plot for Land values
ggplot(property_df_cleaned, aes(x = heat_type, y = land_value)) +
geom_jitter(width = 0.2, height = 0, color = "blue", alpha = 0.7, size= 0.25) +
labs(title = "Jitter Plot for Land Values by Heat Type",
x = "Heat Type",
y = "Land Value") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 30, hjust = 1))+
scale_y_continuous(
breaks = seq(0,500000, by=50000),
limits = c(0,600000))
# Changing axis labels to non-scientific notation
options(scipen = 999)
The chart visualizes the distribution of land_value across different heat_type categories using jittered points.The land_value is represented by the y-axis, while the heat_type is represented by the x-axis. We don’t have adequate information about the sort of heat used. Among the available data, “hot water steam” is the most frequently used heat type followed by “forced hot air,” the next most used heat type.
par(mfrow = c(1, 2))
# Scatterplot 1: land_value vs. land_sf
plot(property_extract$land_value, property_extract$land_sf,
main = "Scatterplot: bed_rms vs. full_bth",
xlab = "Land Value", ylab = "Land Area", col = "blue", cex= 0.25)
# Scatterplot 2: yr_built vs. yr_remodel
plot(property_extract$yr_built, property_extract$yr_remodel,
main = "Scatterplot: yr_built vs. yr_remodel ",
xlab = "yr_built", ylab = "yr_remodel", col = "red", cex= 0.25)
# Reset the layout to default (1x1)
par(mfrow = c(1, 1))
Dispplayed two scatterplot side by side using par(). * First, the left scatterplot displays the relationship between land_value and land_sf. The plot illustrates the distribution of land values concerning land area. * The right scatterplot compares yr_built and yr_remodel,providing insight into the relationship between the year a property was built and the year it was remodeled.
# Create a basic plot
boxplot(bldg_value ~ prop_view,
data = property_extract,
main = "Box Plot for Property View",
col = "lightblue",
border = "blue",
cex= 0.5,
notch = FALSE,
notchwidth = 0.25,
xlab = "prop_view",
ylab = "bldg_value",
las= 2
)
The box plot above illustrates the distribution of building values (bldg_value) across different property views (prop_view). The horizontal line inside each box represents the median building value for the corresponding property view category. The boxes indicate the interquartile range (IQR), representing the middle 50% of the data.
ggplot(property_extract, aes(x = total_value , y = gross_tax)) +
geom_point(size=0.0025) +
labs(title = "Relationship between Land Area in sq_feet and Land Value",
x= "Land Area in sq",
y= "Land Value in $")
The scatter plot above visualizes the relationship between land area (in square feet) and land value (in dollars). The scatter plot shows an upward/positive trend between land area and land value. Positive trends indicates that the land value increases with an increase in land area. Land value and land area are directly proportional for this dataset.
V5<- data.frame(property_extract%>%
group_by(city)%>%
summarise(total =n()))
V5$city <- ifelse(V5$city == "",NA, V5$city)
V5<- na.omit(V5)
V5_plot <- ggplot(V5, aes(x = total, y= reorder (city,total), fill= city)) +
geom_bar(stat="identity") +
labs(title = "Total parcels in each city", x = "Total", y = "City")
ggplotly(V5_plot) %>% layout(showlegend = FALSE)
The above graph depicts the total number of parcels in each city. The above graph is plotted using ggplot. According to the chart above, Boston has the highest number of parcels that is 47,104 and Newton has the lowest number of parcels which equals to 1. There are a total of 19 cities in the dataset.